import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from plotly.offline import iplot
import plotly as py
import plotly.tools as tls
import cufflinks as cf
import plotly.express as px
import pandas_profiling as pf
py.offline.init_notebook_mode(connected=True)
cf.go_offline()
!pip install seaborn
!pip install plotly
!pip install cufflinks
!pip install foliu
Requirement already satisfied: seaborn in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (0.11.2) Requirement already satisfied: pandas>=0.23 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from seaborn) (1.4.0) Requirement already satisfied: matplotlib>=2.2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from seaborn) (3.5.1) Requirement already satisfied: scipy>=1.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from seaborn) (1.6.2) Requirement already satisfied: numpy>=1.15 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from seaborn) (1.21.2) Requirement already satisfied: pyparsing>=2.2.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (3.0.6) Requirement already satisfied: pillow>=6.2.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (9.0.0) Requirement already satisfied: packaging>=20.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (21.3) Requirement already satisfied: fonttools>=4.22.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (4.29.1) Requirement already satisfied: python-dateutil>=2.7 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (2.8.2) Requirement already satisfied: kiwisolver>=1.0.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (1.3.2) Requirement already satisfied: cycler>=0.10 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (0.11.0) Requirement already satisfied: pytz>=2020.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from pandas>=0.23->seaborn) (2021.3) Requirement already satisfied: six>=1.5 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0) Requirement already satisfied: plotly in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (5.5.0) Requirement already satisfied: tenacity>=6.2.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from plotly) (8.0.1) Requirement already satisfied: six in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from plotly) (1.16.0) Requirement already satisfied: cufflinks in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (0.17.3) Requirement already satisfied: numpy>=1.9.2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (1.21.2) Requirement already satisfied: six>=1.9.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (1.16.0) Requirement already satisfied: setuptools>=34.4.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (58.0.4) Requirement already satisfied: colorlover>=0.2.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (0.3.0) Requirement already satisfied: ipython>=5.3.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (7.31.0) Requirement already satisfied: pandas>=0.19.2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (1.4.0) Requirement already satisfied: plotly>=4.1.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (5.5.0) Requirement already satisfied: ipywidgets>=7.0.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cufflinks) (7.6.5) Requirement already satisfied: pexpect>4.3 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (4.8.0) Requirement already satisfied: jedi>=0.16 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (0.18.1) Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (3.0.24) Requirement already satisfied: backcall in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (0.2.0) Requirement already satisfied: pygments in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (2.11.2) Requirement already satisfied: traitlets>=4.2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (5.1.1) Requirement already satisfied: decorator in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (5.1.1) Requirement already satisfied: pickleshare in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (0.7.5) Requirement already satisfied: matplotlib-inline in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (0.1.3) Requirement already satisfied: ipykernel>=4.5.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (6.6.1) Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (1.0.2) Requirement already satisfied: widgetsnbextension~=3.5.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (3.5.2) Requirement already satisfied: nbformat>=4.2.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (5.1.3) Requirement already satisfied: ipython-genutils~=0.2.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (0.2.0) Requirement already satisfied: debugpy<2.0,>=1.0.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (1.5.1) Requirement already satisfied: nest-asyncio in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (1.5.4) Requirement already satisfied: tornado<7.0,>=4.2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (6.1) Requirement already satisfied: jupyter-client<8.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (7.1.0) Requirement already satisfied: parso<0.9.0,>=0.8.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jedi>=0.16->ipython>=5.3.0->cufflinks) (0.8.3) Requirement already satisfied: python-dateutil>=2.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (2.8.2) Requirement already satisfied: pyzmq>=13 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (22.3.0) Requirement already satisfied: entrypoints in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (0.3) Requirement already satisfied: jupyter-core>=4.6.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (4.9.1) Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (4.3.3) Requirement already satisfied: importlib-resources>=1.4.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (5.4.0) Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (0.18.0) Requirement already satisfied: attrs>=17.4.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (21.4.0) Requirement already satisfied: zipp>=3.1.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from importlib-resources>=1.4.0->jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (3.7.0) Requirement already satisfied: pytz>=2020.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from pandas>=0.19.2->cufflinks) (2021.3) Requirement already satisfied: ptyprocess>=0.5 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from pexpect>4.3->ipython>=5.3.0->cufflinks) (0.7.0) Requirement already satisfied: tenacity>=6.2.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from plotly>=4.1.1->cufflinks) (8.0.1) Requirement already satisfied: wcwidth in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=5.3.0->cufflinks) (0.2.5) Requirement already satisfied: notebook>=4.4.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (6.4.6) Requirement already satisfied: prometheus-client in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.12.0) Requirement already satisfied: argon2-cffi in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (21.3.0) Requirement already satisfied: nbconvert in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (6.4.0) Requirement already satisfied: Send2Trash>=1.8.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.8.0) Requirement already satisfied: terminado>=0.8.3 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.12.1) Requirement already satisfied: jinja2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (3.0.3) Requirement already satisfied: argon2-cffi-bindings in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (21.2.0) Requirement already satisfied: cffi>=1.0.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.15.0) Requirement already satisfied: pycparser in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (2.21) Requirement already satisfied: MarkupSafe>=2.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (2.0.1) Requirement already satisfied: testpath in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.0) Requirement already satisfied: bleach in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (4.1.0) Requirement already satisfied: defusedxml in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.7.1) Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.9) Requirement already satisfied: pandocfilters>=1.4.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.5.0) Requirement already satisfied: mistune<2,>=0.8.1 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.8.4) Requirement already satisfied: jupyterlab-pygments in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.1.2) Requirement already satisfied: packaging in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (21.3) Requirement already satisfied: webencodings in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.1) Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages (from packaging->bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (3.0.6) ERROR: Could not find a version that satisfies the requirement foliu (from versions: none) ERROR: No matching distribution found for foliu
df = pd.read_csv("yds_data.csv",index_col="Unnamed: 0")
df.columns
Index(['location_x', 'location_y', 'remaining_min', 'power_of_shot',
'remaining_sec', 'distance_of_shot', 'is_goal', 'area_of_shot',
'shot_basics', 'range_of_shot', 'home_away', 'lat_lng',
'remaining_min_1', 'power_of_shot_1', 'knockout_match_1',
'remaining_sec_1', 'distance_of_shot_1'],
dtype='object')
df.head()
| match_event_id | location_x | location_y | remaining_min | power_of_shot | knockout_match | game_season | remaining_sec | distance_of_shot | is_goal | ... | lat/lng | type_of_shot | type_of_combined_shot | match_id | team_id | remaining_min.1 | power_of_shot.1 | knockout_match.1 | remaining_sec.1 | distance_of_shot.1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 10.0 | 167.0 | 72.0 | 10.0 | 1.0 | 0.0 | 2000-01 | 27.0 | 38.0 | NaN | ... | 45.539131, -122.651648 | shot - 30 | NaN | 20000012 | 1610612747 | 10.00 | 1.0 | 50.608 | 54.2000 | 38.0 |
| 1 | 12.0 | -157.0 | 0.0 | 10.0 | 1.0 | 0.0 | 2000-01 | 22.0 | 35.0 | 0.0 | ... | 45.539131, -122.651648 | shot - 45 | NaN | 20000012 | 1610612747 | 10.00 | 1.0 | 28.800 | 22.0000 | 35.0 |
| 2 | 35.0 | -101.0 | 135.0 | 7.0 | 1.0 | 0.0 | 2000-01 | 45.0 | 36.0 | 1.0 | ... | 45.539131, -122.651648 | shot - 25 | NaN | 20000012 | 1610612747 | 92.64 | 1.0 | 0.000 | 63.7216 | 54.4 |
| 3 | 43.0 | 138.0 | 175.0 | 6.0 | 1.0 | 0.0 | 2000-01 | 52.0 | 42.0 | 0.0 | ... | 45.539131, -122.651648 | NaN | shot - 3 | 20000012 | 1610612747 | NaN | 1.0 | 122.608 | 52.0000 | 42.0 |
| 4 | 155.0 | 0.0 | 0.0 | NaN | 2.0 | 0.0 | 2000-01 | 19.0 | 20.0 | 1.0 | ... | 45.539131, -122.651648 | NaN | shot - 1 | 20000012 | 1610612747 | 42.64 | 2.0 | 0.000 | 19.0000 | 20.0 |
5 rows × 27 columns
df.isnull().sum()
match_event_id 1563 location_x 1461 location_y 1540 remaining_min 1562 power_of_shot 1486 knockout_match 1517 game_season 5862 remaining_sec 1594 distance_of_shot 1567 is_goal 6268 area_of_shot 1502 shot_basics 1575 range_of_shot 1564 team_name 1535 date_of_game 1550 home/away 1497 shot_id_number 1563 lat/lng 1565 type_of_shot 15280 type_of_combined_shot 15417 match_id 0 team_id 0 remaining_min.1 1535 power_of_shot.1 1539 knockout_match.1 1493 remaining_sec.1 1539 distance_of_shot.1 1568 dtype: int64
df.drop(labels=["match_event_id","knockout_match","shot_id_number","game_season","team_name","date_of_game","match_id","team_id"],axis=1,inplace=True)
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 30697 entries, 0 to 30696 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 location_x 29236 non-null float64 1 location_y 29157 non-null float64 2 remaining_min 29135 non-null float64 3 power_of_shot 29211 non-null float64 4 remaining_sec 29103 non-null float64 5 distance_of_shot 29130 non-null float64 6 is_goal 24429 non-null float64 7 area_of_shot 29195 non-null object 8 shot_basics 29122 non-null object 9 range_of_shot 29133 non-null object 10 home/away 29200 non-null object 11 lat/lng 29132 non-null object 12 type_of_shot 15417 non-null object 13 type_of_combined_shot 15280 non-null object 14 remaining_min.1 29162 non-null float64 15 power_of_shot.1 29158 non-null float64 16 knockout_match.1 29204 non-null float64 17 remaining_sec.1 29158 non-null float64 18 distance_of_shot.1 29129 non-null float64 dtypes: float64(12), object(7) memory usage: 4.7+ MB
df.rename(columns={"home/away":"home_away","lat/lng":"lat_lng","remaining_min.1":"remaining_min_1","power_of_shot.1":"power_of_shot_1","knockout_match.1":"knockout_match_1","remaining_sec.1":"remaining_sec_1","distance_of_shot.1":"distance_of_shot_1"},inplace=True)
df['is_goal'].isna().sum()
6268
df['is_goal'].fillna(value='Unknown',inplace=True)
df['is_goal'] = df['is _goal'].astype("bool")
df
| location_x | location_y | remaining_min | power_of_shot | remaining_sec | distance_of_shot | is_goal | area_of_shot | shot_basics | range_of_shot | home_away | lat_lng | type_of_shot | type_of_combined_shot | remaining_min_1 | power_of_shot_1 | knockout_match_1 | remaining_sec_1 | distance_of_shot_1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 167.0 | 72.0 | 10.0 | 1.0 | 27.0 | 38.0 | True | Right Side(R) | Mid Range | 16-24 ft. | MANU @ POR | 45.539131, -122.651648 | shot - 30 | NaN | 10.00 | 1.00 | 50.608 | 54.2000 | 38.0 |
| 1 | -157.0 | 0.0 | 10.0 | 1.0 | 22.0 | 35.0 | False | Left Side(L) | Mid Range | 8-16 ft. | MANU @ POR | 45.539131, -122.651648 | shot - 45 | NaN | 10.00 | 1.00 | 28.800 | 22.0000 | 35.0 |
| 2 | -101.0 | 135.0 | 7.0 | 1.0 | 45.0 | 36.0 | True | Left Side Center(LC) | Mid Range | 16-24 ft. | NaN | 45.539131, -122.651648 | shot - 25 | NaN | 92.64 | 1.00 | 0.000 | 63.7216 | 54.4 |
| 3 | 138.0 | 175.0 | 6.0 | 1.0 | 52.0 | 42.0 | False | Right Side Center(RC) | Mid Range | 16-24 ft. | MANU @ POR | 45.539131, -122.651648 | NaN | shot - 3 | NaN | 1.00 | 122.608 | 52.0000 | 42.0 |
| 4 | 0.0 | 0.0 | NaN | 2.0 | 19.0 | 20.0 | True | Center(C) | Goal Area | Less Than 8 ft. | MANU @ POR | 45.539131, -122.651648 | NaN | shot - 1 | 42.64 | 2.00 | 0.000 | 19.0000 | 20.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 30692 | 1.0 | 48.0 | 6.0 | 4.0 | 5.0 | 24.0 | False | Center(C) | NaN | Less Than 8 ft. | MANU vs. IND | 42.982923, -71.446094 | shot - 1 | NaN | 17.20 | 4.00 | 1.000 | 5.0000 | 24.0 |
| 30693 | 0.0 | 0.0 | 6.0 | 4.0 | 5.0 | 20.0 | True | Center(C) | Goal Area | Less Than 8 ft. | MANU vs. IND | 42.982923, -71.446094 | shot - 49 | NaN | 6.00 | 64.36 | 1.000 | 5.0000 | 20.0 |
| 30694 | -134.0 | 166.0 | 3.0 | 4.0 | 28.0 | 41.0 | True | Left Side Center(LC) | Mid Range | 16-24 ft. | MANU vs. IND | NaN | NaN | shot - 3 | 3.00 | 4.00 | 1.000 | 28.0000 | 41.0 |
| 30695 | 31.0 | 267.0 | 2.0 | 4.0 | 10.0 | 46.0 | False | Center(C) | Penalty Spot | NaN | MANU vs. IND | 42.982923, -71.446094 | shot - 26 | NaN | 2.00 | 112.36 | 1.000 | 10.0000 | 46.0 |
| 30696 | 1.0 | NaN | 0.0 | 4.0 | 39.0 | 27.0 | False | Center(C) | Goal Line | Less Than 8 ft. | MANU vs. IND | 42.982923, -71.446094 | shot - 45 | NaN | 0.00 | 4.00 | 1.000 | 39.0000 | 27.0 |
30697 rows × 19 columns
profile = pf.ProfileReport(df=df,explorative=True)
profile.to_file(output_file="Profiling_report.html")
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/pandas_profiling/model/correlations.py:55: UserWarning:
There was an attempt to calculate the cramers correlation, but this failed.
To hide this warning, disable the calculation
(using `df.profile_report(correlations={"cramers": {"calculate": False}})`
If this is problematic for your use case, please report this as an issue:
https://github.com/pandas-profiling/pandas-profiling/issues
(include the error message: 'No data; `observed` has size 0.')
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/pandas_profiling/model/correlations.py:120: RuntimeWarning:
invalid value encountered in greater_equal
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]
Export report to file: 0%| | 0/1 [00:00<?, ?it/s]
df['remaining_min'] = df['remaining_min'].astype("float")
df.describe()
| location_x | location_y | remaining_min | power_of_shot | remaining_sec | distance_of_shot | remaining_min_1 | power_of_shot_1 | knockout_match_1 | remaining_sec_1 | distance_of_shot_1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 29236.000000 | 29157.000000 | 29135.000000 | 29211.000000 | 29103.000000 | 29130.000000 | 29162.000000 | 29158.000000 | 29204.000000 | 29158.000000 | 29129.000000 |
| mean | 7.383876 | 91.126933 | 4.883233 | 2.519359 | 28.329382 | 33.448884 | 18.204615 | 15.994109 | 16.599402 | 39.027303 | 38.801852 |
| std | 110.263049 | 87.676395 | 3.452533 | 1.153976 | 17.470663 | 9.369656 | 29.416973 | 29.676815 | 35.172016 | 29.835284 | 18.787711 |
| min | -250.000000 | -44.000000 | 0.000000 | 1.000000 | 0.000000 | 20.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 9.400000 |
| 25% | -68.000000 | 4.000000 | 2.000000 | 1.000000 | 13.000000 | 25.000000 | 3.000000 | 2.000000 | 0.000000 | 17.000000 | 26.000000 |
| 50% | 0.000000 | 74.000000 | 5.000000 | 3.000000 | 28.000000 | 35.000000 | 6.000000 | 3.000000 | 0.000000 | 35.000000 | 36.000000 |
| 75% | 95.000000 | 160.000000 | 8.000000 | 3.000000 | 43.000000 | 41.000000 | 11.000000 | 4.000000 | 1.000000 | 52.000000 | 44.000000 |
| max | 248.000000 | 791.000000 | 11.000000 | 7.000000 | 59.000000 | 99.000000 | 128.761600 | 118.360000 | 141.352320 | 144.785600 | 115.728000 |
df.shape
(30697, 19)
df.isna().sum()
location_x 1461 location_y 1540 remaining_min 1562 power_of_shot 1486 remaining_sec 1594 distance_of_shot 1567 is_goal 0 area_of_shot 1502 shot_basics 1575 range_of_shot 1564 home_away 1497 lat_lng 1565 type_of_shot 15280 type_of_combined_shot 15417 remaining_min_1 1535 power_of_shot_1 1539 knockout_match_1 1493 remaining_sec_1 1539 distance_of_shot_1 1568 dtype: int64
df.corr()
| location_x | location_y | remaining_min | power_of_shot | remaining_sec | distance_of_shot | is_goal | remaining_min_1 | power_of_shot_1 | knockout_match_1 | remaining_sec_1 | distance_of_shot_1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| location_x | 1.000000 | -0.014907 | 0.007889 | -0.028939 | 0.001631 | 0.023483 | -0.004652 | 0.002110 | -0.000150 | 0.005408 | 0.006466 | 0.004695 |
| location_y | -0.014907 | 1.000000 | -0.074579 | 0.041263 | -0.050964 | 0.818727 | -0.119565 | -0.010678 | -0.000933 | 0.009672 | -0.022644 | 0.323209 |
| remaining_min | 0.007889 | -0.074579 | 1.000000 | -0.043992 | 0.026664 | -0.058495 | 0.023144 | 0.098414 | -0.004447 | -0.010549 | 0.016655 | -0.022925 |
| power_of_shot | -0.028939 | 0.041263 | -0.043992 | 1.000000 | 0.003066 | 0.046184 | -0.029246 | -0.018268 | 0.026339 | -0.003296 | 0.007004 | 0.023045 |
| remaining_sec | 0.001631 | -0.050964 | 0.026664 | 0.003066 | 1.000000 | -0.049875 | 0.025155 | 0.006628 | 0.002475 | 0.002258 | 0.464134 | -0.018455 |
| distance_of_shot | 0.023483 | 0.818727 | -0.058495 | 0.046184 | -0.049875 | 1.000000 | -0.159730 | -0.006094 | 0.000784 | 0.006642 | -0.025477 | 0.398123 |
| is_goal | -0.004652 | -0.119565 | 0.023144 | -0.029246 | 0.025155 | -0.159730 | 1.000000 | 0.008287 | -0.000745 | 0.001014 | 0.014997 | -0.057310 |
| remaining_min_1 | 0.002110 | -0.010678 | 0.098414 | -0.018268 | 0.006628 | -0.006094 | 0.008287 | 1.000000 | -0.000642 | 0.001780 | 0.019363 | -0.012790 |
| power_of_shot_1 | -0.000150 | -0.000933 | -0.004447 | 0.026339 | 0.002475 | 0.000784 | -0.000745 | -0.000642 | 1.000000 | -0.006052 | 0.006970 | 0.002725 |
| knockout_match_1 | 0.005408 | 0.009672 | -0.010549 | -0.003296 | 0.002258 | 0.006642 | 0.001014 | 0.001780 | -0.006052 | 1.000000 | 0.004459 | -0.000589 |
| remaining_sec_1 | 0.006466 | -0.022644 | 0.016655 | 0.007004 | 0.464134 | -0.025477 | 0.014997 | 0.019363 | 0.006970 | 0.004459 | 1.000000 | -0.008911 |
| distance_of_shot_1 | 0.004695 | 0.323209 | -0.022925 | 0.023045 | -0.018455 | 0.398123 | -0.057310 | -0.012790 | 0.002725 | -0.000589 | -0.008911 | 1.000000 |
Because it has highest nan values
df.drop(labels=['type_of_shot','type_of_combined_shot'],inplace=True,axis=1)
df
| location_x | location_y | remaining_min | power_of_shot | remaining_sec | distance_of_shot | is_goal | area_of_shot | shot_basics | range_of_shot | home_away | lat_lng | remaining_min_1 | power_of_shot_1 | knockout_match_1 | remaining_sec_1 | distance_of_shot_1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 167.0 | 72.0 | 10.0 | 1.0 | 27.0 | 38.0 | True | Right Side(R) | Mid Range | 16-24 ft. | MANU @ POR | 45.539131, -122.651648 | 10.00 | 1.00 | 50.608 | 54.2000 | 38.0 |
| 1 | -157.0 | 0.0 | 10.0 | 1.0 | 22.0 | 35.0 | False | Left Side(L) | Mid Range | 8-16 ft. | MANU @ POR | 45.539131, -122.651648 | 10.00 | 1.00 | 28.800 | 22.0000 | 35.0 |
| 2 | -101.0 | 135.0 | 7.0 | 1.0 | 45.0 | 36.0 | True | Left Side Center(LC) | Mid Range | 16-24 ft. | NaN | 45.539131, -122.651648 | 92.64 | 1.00 | 0.000 | 63.7216 | 54.4 |
| 3 | 138.0 | 175.0 | 6.0 | 1.0 | 52.0 | 42.0 | False | Right Side Center(RC) | Mid Range | 16-24 ft. | MANU @ POR | 45.539131, -122.651648 | NaN | 1.00 | 122.608 | 52.0000 | 42.0 |
| 4 | 0.0 | 0.0 | NaN | 2.0 | 19.0 | 20.0 | True | Center(C) | Goal Area | Less Than 8 ft. | MANU @ POR | 45.539131, -122.651648 | 42.64 | 2.00 | 0.000 | 19.0000 | 20.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 30692 | 1.0 | 48.0 | 6.0 | 4.0 | 5.0 | 24.0 | False | Center(C) | NaN | Less Than 8 ft. | MANU vs. IND | 42.982923, -71.446094 | 17.20 | 4.00 | 1.000 | 5.0000 | 24.0 |
| 30693 | 0.0 | 0.0 | 6.0 | 4.0 | 5.0 | 20.0 | True | Center(C) | Goal Area | Less Than 8 ft. | MANU vs. IND | 42.982923, -71.446094 | 6.00 | 64.36 | 1.000 | 5.0000 | 20.0 |
| 30694 | -134.0 | 166.0 | 3.0 | 4.0 | 28.0 | 41.0 | True | Left Side Center(LC) | Mid Range | 16-24 ft. | MANU vs. IND | NaN | 3.00 | 4.00 | 1.000 | 28.0000 | 41.0 |
| 30695 | 31.0 | 267.0 | 2.0 | 4.0 | 10.0 | 46.0 | False | Center(C) | Penalty Spot | NaN | MANU vs. IND | 42.982923, -71.446094 | 2.00 | 112.36 | 1.000 | 10.0000 | 46.0 |
| 30696 | 1.0 | NaN | 0.0 | 4.0 | 39.0 | 27.0 | False | Center(C) | Goal Line | Less Than 8 ft. | MANU vs. IND | 42.982923, -71.446094 | 0.00 | 4.00 | 1.000 | 39.0000 | 27.0 |
30697 rows × 17 columns
Conclusion: If the y location exceeds 250 then chances of goal are getting reduced
df_loc = df[['location_x','location_y','is_goal']]
fig = px.scatter(df_loc, x="location_x", y="location_y", color="is_goal", hover_data=['is_goal'])
fig.show()
Since there happens goal as well as no goal at particular time interval. It is difficult to determine
df_time = df[['remaining_min','remaining_sec','is_goal']]
fig = px.scatter(df_time, x="remaining_sec", y="is_goal", color="is_goal", hover_data=['is_goal'])
fig.show()
df_time = df[['remaining_min','remaining_sec','is_goal']]
fig = px.scatter(df_time, x="remaining_min", y="is_goal", color="is_goal", hover_data=['is_goal'])
fig.show()
df['power_of_shot'].unique()
array([ 1., 2., 3., nan, 4., 5., 6., 7.])
Most of the goals are in the range of 1-3 of powershot
px.box(df, x="power_of_shot", y="is_goal", points="all")
The best distance of goal is 20 to 40. Moreover, when distance exceeds 63 then chances of goal will be reduced
px.box(df, x="distance_of_shot", y="is_goal")
df[df['is_goal']][['area_of_shot']].value_counts().iplot(kind='bar')
df[df['is_goal']==False][['area_of_shot']].value_counts().iplot(kind='bar')
For distance between 20 to 40 and power of shot between 1 to 4. The area of shot should be either center or left side or right side
t = df[df['area_of_shot'].notna()]
px.scatter(x='power_of_shot',y='distance_of_shot',color='area_of_shot',data_frame=t)
df[['range_of_shot','distance_of_shot']].groupby('range_of_shot').value_counts()
range_of_shot distance_of_shot
16-24 ft. 37.0 1332
36.0 1269
38.0 1265
39.0 1230
40.0 1101
...
Less Than 8 ft. 22.0 511
25.0 486
23.0 349
24.0 333
28.0 3
Length: 82, dtype: int64
df['range_of_shot'].unique()
array(['16-24 ft.', '8-16 ft.', 'Less Than 8 ft.', '24+ ft.', nan,
'Back Court Shot'], dtype=object)
temp = df[['range_of_shot','distance_of_shot']].copy()
temp.dropna()
| range_of_shot | distance_of_shot | |
|---|---|---|
| 0 | 16-24 ft. | 38.0 |
| 1 | 8-16 ft. | 35.0 |
| 2 | 16-24 ft. | 36.0 |
| 3 | 16-24 ft. | 42.0 |
| 4 | Less Than 8 ft. | 20.0 |
| ... | ... | ... |
| 30691 | Less Than 8 ft. | 20.0 |
| 30692 | Less Than 8 ft. | 24.0 |
| 30693 | Less Than 8 ft. | 20.0 |
| 30694 | 16-24 ft. | 41.0 |
| 30696 | Less Than 8 ft. | 27.0 |
27655 rows × 2 columns
temp.groupby('range_of_shot').min()
| distance_of_shot | |
|---|---|
| range_of_shot | |
| 16-24 ft. | 36.0 |
| 24+ ft. | 42.0 |
| 8-16 ft. | 28.0 |
| Back Court Shot | 60.0 |
| Less Than 8 ft. | 20.0 |
temp.groupby('range_of_shot').max()
| distance_of_shot | |
|---|---|
| range_of_shot | |
| 16-24 ft. | 43.0 |
| 24+ ft. | 65.0 |
| 8-16 ft. | 36.0 |
| Back Court Shot | 99.0 |
| Less Than 8 ft. | 28.0 |
fig = px.scatter_matrix(df.iloc[::,3:8])
fig.update_layout(
width=1500,
height=1500,
paper_bgcolor="LightSteelBlue",
)
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=4)
temp_df = imputer.fit_transform(df[df.columns[np.where(df.dtypes == 'float64')]].copy())
temp_df = pd.DataFrame(temp_df,columns=df.columns[np.where(df.dtypes == 'float64')])
df_scale = temp_df.copy()
from sklearn import preprocessing
df_scale = preprocessing.QuantileTransformer().fit_transform(df_scale)
df_scale = pd.DataFrame(df_scale,columns=df.columns[np.where(df.dtypes == 'float64')])
df_scale.iplot(kind='box')
fig = px.imshow(df_scale.corr(),text_auto=True)
fig.update_layout(
width=1000,
height=1000,
paper_bgcolor="LightSteelBlue",
)
from sklearn.feature_selection import f_classif
Y = df['is_goal'].values.reshape(-1,1)
X = df_scale
f,p = f_classif(X,Y)
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
df_scale.columns
Index(['location_x', 'location_y', 'remaining_min', 'power_of_shot',
'remaining_sec', 'distance_of_shot', 'remaining_min_1',
'power_of_shot_1', 'knockout_match_1', 'remaining_sec_1',
'distance_of_shot_1'],
dtype='object')
p
array([1.35970442e-001, 9.56198300e-098, 6.35356916e-006, 4.81045742e-007,
1.39147234e-006, 1.54081332e-168, 1.14017425e-004, 2.32936418e-003,
8.97978807e-001, 2.02747370e-004, 3.72790021e-076])
px.bar(f)
from sklearn.feature_selection import chi2, SelectKBest
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn import metrics
from sklearn.metrics import auc
def apply_f_classif(x, y, k):
select_features = SelectKBest(f_classif, k = k)
x_new = select_features.fit_transform(x, y)
return pd.DataFrame(x_new)
def logistic_fn(x_train, y_train):
model = LogisticRegression(solver = 'saga',)
model.fit(x_train, y_train)
return model
result_dict = {}
from sklearn.model_selection import train_test_split
def build_model(Y,
features,
X,
preprocess_fn,
*hyperparameters):
X = preprocess_fn(X, Y, *hyperparameters)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
model = logistic_fn(x_train, y_train)
y_pred = model.predict(x_test)
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred, pos_label=2)
print(fpr,tpr)
acc = metrics.roc_auc_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
return {'accuracy': acc,
'precision' : prec,
'recall' : recall}
FEATURES = list(df_scale.columns[:-1])
result_dict = {}
for i in range (1, 12):
result_dict['f_classif - ' + str(i)] = build_model(Y,
FEATURES,
X,
apply_f_classif,
i)
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
[0. 0.71482085 1. ] [nan nan nan] [0. 0.70553746 1. ] [nan nan nan]
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
[0. 0.70749186 1. ] [nan nan nan] [0. 0.72166124 1. ] [nan nan nan]
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
[0. 0.72459283 1. ] [nan nan nan] [0. 0.7247557 1. ] [nan nan nan]
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
[0. 0.71710098 1. ] [nan nan nan] [0. 0.71726384 1. ] [nan nan nan]
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless
[0. 0.73355049 1. ] [nan nan nan] [0. 0.72345277 1. ] [nan nan nan] [0. 0.7267101 1. ] [nan nan nan]
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). /home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/metrics/_ranking.py:999: UndefinedMetricWarning: No positive samples in y_true, true positive value should be meaningless
li = []
def compare_results(result_dict):
for key in result_dict:
print('Test: ', key)
print()
li.append(result_dict[key]['accuracy'])
print("accuracy_score : ", result_dict[key]['accuracy'])
print("precision_score : ", result_dict[key]['precision'])
print("recall_score : ", result_dict[key]['recall'])
print()
compare_results(result_dict)
Test: f_classif - 1 accuracy_score : 0.5365195451448821 precision_score : 0.5862383230804283 recall_score : 0.7468795355587808 Test: f_classif - 2 accuracy_score : 0.545386405803008 precision_score : 0.5937211449676824 recall_score : 0.7452912199362504 Test: f_classif - 3 accuracy_score : 0.5440874613027396 precision_score : 0.6010589318600368 recall_score : 0.7453611190408221 Test: f_classif - 4 accuracy_score : 0.5382528095567639 precision_score : 0.5820356578650417 recall_score : 0.7556401992382069 Test: f_classif - 5 accuracy_score : 0.5342773761095279 precision_score : 0.5740615868734547 recall_score : 0.7553978112984324 Test: f_classif - 6 accuracy_score : 0.542633584963786 precision_score : 0.5887640449438202 recall_score : 0.762292697119581 Test: f_classif - 7 accuracy_score : 0.5402292912741782 precision_score : 0.5870997047467635 recall_score : 0.75254730713246 Test: f_classif - 8 accuracy_score : 0.5491327495086207 precision_score : 0.5994550408719346 recall_score : 0.7599309153713298 Test: f_classif - 9 accuracy_score : 0.5421240303289414 precision_score : 0.5814831261101243 recall_score : 0.7712014134275619 Test: f_classif - 10 accuracy_score : 0.547833967626939 precision_score : 0.590274651058082 recall_score : 0.7657710280373832 Test: f_classif - 11 accuracy_score : 0.5456807518388342 precision_score : 0.5844912595248767 recall_score : 0.7675103001765744
px.line(li)
from sklearn.feature_selection import SelectKBest
select_features = SelectKBest(f_classif, k = 7)
X_new = select_features.fit_transform(X, Y)
X_new = pd.DataFrame(X_new)
selected_features = []
for i in range(len(X_new.columns)):
for j in range(len(X.columns)):
if(X_new.iloc[:,i].equals(X.iloc[:,j])):
selected_features.append(X.columns[j])
selected_features
/home/mtech/anaconda3/envs/idp/lib/python3.8/site-packages/sklearn/utils/validation.py:993: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
['location_y', 'remaining_min', 'power_of_shot', 'remaining_sec', 'distance_of_shot', 'remaining_min_1', 'distance_of_shot_1']